This repository contains the scripts for generating the RNA-Seq data analysis for “Corticosteroids activate Gs and cAMP production via a rapid non-genomic mechanism that contributes to a subset of the canonical genomic effects.” Goal of this document is to validate the GNAS(Gs) knockdown and to identify differentially expressed genes due to budesonide treatment for each experimental condition. We processed the single end Illumina reads that are 100 bp long with Rsubread, an open source R package. The gene counts are used to identify the differentially expressed genes with DESeq package.
featureCounts<-as.matrix(read.table("~/Desktop/Glucocorticoids-bioinformatics/Matrices/Matrices/Gs_HASM.featurecounts_1_symbol.txt", sep='\t', stringsAsFactors=FALSE, header=1, row.names=1))
#Filtering gene that has counts less than 100 for 50% of the samples
featureCounts_f1<-featureCounts[apply(featureCounts[,1:24]>100,1,mean) >=0.5,]
pts<-c(1,5,9,13,17,21)
contr<-subset(featureCounts_f1,select=pts)
contr_budesonide<-subset(featureCounts_f1,select=(pts+1))
gs_kd<-subset(featureCounts_f1,select=(pts+2))
gs_kd_budesonide<-subset(featureCounts_f1,select=(pts+3))
condition<- factor(c(rep("contr",6),rep("contr_budesonide",6),rep("gs_kd",6),rep("gs_kd_budesonide",6)))
hasm_de<-function(data, condition, FCCutOff=1, padjCutOff=0.001,pathways=c2,dge=T){
hasm_dds <- DESeqDataSetFromMatrix(countData =data,DataFrame(condition), design= ~condition)
hasm_dds <- DESeq(hasm_dds)
res <- data.frame(results(hasm_dds, contrast =c("condition",levels(condition))))
plotMA(hasm_dds)
de<-na.exclude(res)
fgsea_analysis(res, pathways,dge)
de<-de[(abs(de$log2FoldChange)>FCCutOff),]
de<-de[(de$padj<padjCutOff),]
de_ord<-de[order(de$log2FoldChange,decreasing = 0),]
boxplot(data[rownames(de_ord)[1],]~condition,ylab="FeatureCounts",main=paste(rownames(de_ord)[1], "log2FC =", de_ord[1,2]))
return(de_ord)
}
tpm<-as.matrix(log2(read.table("~/Desktop/Glucocorticoids-bioinformatics/Matrices/Matrices/Gs_HASM.tpm_1_symbol.txt", sep='\t', stringsAsFactors=FALSE, header=1, row.names=1)+1))
tpm_f1<-tpm[rownames(tpm)%in%rownames(featureCounts_f1),]
conditions<-c(rep(c("cont","cont_bud","GNAS_kd","GNAS_kd_bud"),6))
for(i in 1:nrow(hasm_de_c1)){
boxplot(t(as.data.frame(tpm)[row.names(hasm_de_c1)[i],])~conditions, main=row.names(hasm_de_c1)[i],ylab="Log2 TPM Count",ylim=c(0,12))
}
for(i in 1:nrow(hasm_de_c2)){
boxplot(t(as.data.frame(tpm)[row.names(hasm_de_c2)[i],])~conditions, main=row.names(hasm_de_c2)[i],ylab="Log2 TPM Count",ylim=c(0,12))
}
#checking patient specific effect
require(graphics)
pca_mat <- prcomp(t(tpm_f1))
plot(pca_mat)
plot(pca_mat$x[,1],pca_mat$x[,2], main="Top 2 PCs in 6 Pts", pch=2,col=2)
abline(h=0,v=0)
points(pca_mat$x[1:4,1],pca_mat$x[1:4,2],col=2, pch=2,lwd=3)
points(pca_mat$x[5:8,1],pca_mat$x[5:8,2],col=3, pch=2,lwd=3)
points(pca_mat$x[9:12,1],pca_mat$x[9:12,2],col=4, pch=2,lwd=3)
points(pca_mat$x[13:16,1],pca_mat$x[13:16,2],col=5, pch=2,lwd=3)
points(pca_mat$x[17:20,1],pca_mat$x[17:20,2],col=6, pch=2,lwd=3)
points(pca_mat$x[21:24,1],pca_mat$x[21:24,2],col=7, pch=2,lwd=3)
legend(18,-6,legend=c("Pt_1", "Pt_2", "Pt_3","Pt_4", "Pt_5","Pt_6"), col=c(2:7), pch = 2,pt.lwd = 3)
patients<-c(rep("Pt1",4),rep("Pt2",4),rep("Pt3",4),rep("Pt4",4),rep("Pt5",4),rep("Pt6",4))
boxplot(pca_mat$x[,1]~patients)
boxplot(pca_mat$x[,2]~patients)
boxplot(pca_mat$x[,3]~patients)#patient specific variation
boxplot(pca_mat$x[,4]~patients)
boxplot(pca_mat$x[,5]~patients)
#checking condition specific effect
plot(pca_mat)
plot(pca_mat$x[,1],pca_mat$x[,2], main="Top 2 PCs across\n Tx Conditions")
abline(h=0,v=0)
c_i<-c(1,5,9,13,17,21)
points(pca_mat$x[c_i,1],pca_mat$x[c_i,2],col=2, lwd=3)
points(pca_mat$x[c_i+1,1],pca_mat$x[c_i+1,2],col=3, lwd=3)
points(pca_mat$x[c_i+2,1],pca_mat$x[c_i+2,2],col=4, lwd=3)
points(pca_mat$x[c_i+3,1],pca_mat$x[c_i+3,2],col=5, lwd=3)
legend(11,-13,legend=c("contrl", "contrl+budesonide", "GNAS-kd","GNAS-kd+budesonide"), col=c(2,3,4,5), pch = 1,pt.lwd = 3)
contr_t<-subset(tpm_f1,select=pts)
contr_budesonide_t<-subset(tpm_f1,select=(pts+1))
gs_kd_t<-subset(tpm_f1,select=(pts+2))
gs_kd_budesonide_t<-subset(tpm_f1,select=(pts+3))
tpm<-cbind(contr_t, contr_budesonide_t,gs_kd_t,gs_kd_budesonide_t)
pca_mat <- prcomp(t(tpm))
boxplot(pca_mat$x[,1]~conditions)
boxplot(pca_mat$x[,2]~conditions)
boxplot(pca_mat$x[,3]~conditions)
boxplot(pca_mat$x[,4]~conditions)
boxplot(pca_mat$x[,5]~conditions)
## Creating Budesonide signatures with differentially expressed gene list using ASSIGN and predicting budesonide transcriptional activity
##Correlation analysis of predicted budesonide activity from control and Gs-knockdown signatures
##
## Welch Two Sample t-test
##
## data: read.csv("~/Desktop/Glucocorticoids-bioinformatics/050719/budesonide_nogen_gen_de_140/pathway_activity_testset.csv")[, and read.csv("~/Desktop/Glucocorticoids-bioinformatics/050719/budesonide_gen_de_121/pathway_activity_testset.csv")[, 2] and 2]
## t = 0.051002, df = 45.967, p-value = 0.9595
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.2709810 0.2850697
## sample estimates:
## mean of x mean of y
## 0.5129062 0.5058618
##
## Pearson's product-moment correlation
##
## data: read.csv("~/Desktop/Glucocorticoids-bioinformatics/050719/budesonide_nogen_gen_de_140/pathway_activity_testset.csv")[, and read.csv("~/Desktop/Glucocorticoids-bioinformatics/050719/budesonide_gen_de_121/pathway_activity_testset.csv")[, 2] and 2]
## t = 69.452, df = 22, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9946622 0.9990332
## sample estimates:
## cor
## 0.9977273
This analysis was run on Thu May 09 16:07:22 2019